# 爬虫爬取梨视频中的视频
import requests
import re  #正则表达式
from bs4 import BeautifulSoup
import time
from lxml import etree #xpath解析模块
DAT = 0
for data in range(20):
    a=f'https://www.pearvideo.com/category_loading.jsp?reqType=5&categoryId=5&start={DAT}&mrd=0.9590819541259208'
    print(DAT)
    DAT = DAT + 12
    b= {
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36',
        'Cookie':'__secdyid=0f911ef86e61b86bdf0a8d04df36a3df741ee4450f515dde021653897786; acw_tc=2f624a2416538977867994512e7f6df3511667444dd7a7a6220b3cfccd99fb; JSESSIONID=977FAE2118DAFA95C21DCCFEA15EC4F5; PEAR_UUID=20be81b5-0767-4a32-a362-cb49482d0238; _uab_collina=165389778816738646344618; Hm_lvt_9707bc8d5f6bba210e7218b8496f076a=1653897788; p_h5_u=905EFC7B-DFC8-4FDF-83DF-47624756CC42; Hm_lpvt_9707bc8d5f6bba210e7218b8496f076a=1653899232; SERVERID=bacac21aafa9027952fdc46518c0c74f|1653899455|1653897786',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
        'Host': 'www.pearvideo.com',
    }
    page_text=requests.get(a,headers=b).text
    tree=etree.HTML(page_text)
    li_list=tree.xpath('//div[@class="vervideo-bd"]/a/@href')
    li_list_Name = tree.xpath('//div[@class="vervideo-bd"]/a/div/text()[1]')
    print(li_list)
    li_list_Name2 = eval(str(li_list_Name).replace('“','').replace('”','').replace('，',',').replace('？','').replace('：','').replace('！','').replace('…','').replace('《','').replace('》',''))

    N = 1
    for li in li_list:
        time.sleep(2)
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36',
            'Cookie': '__secdyid=0f911ef86e61b86bdf0a8d04df36a3df741ee4450f515dde021653897786; acw_tc=2f624a2416538977867994512e7f6df3511667444dd7a7a6220b3cfccd99fb; JSESSIONID=977FAE2118DAFA95C21DCCFEA15EC4F5; PEAR_UUID=20be81b5-0767-4a32-a362-cb49482d0238; _uab_collina=165389778816738646344618; Hm_lvt_9707bc8d5f6bba210e7218b8496f076a=1653897788; p_h5_u=905EFC7B-DFC8-4FDF-83DF-47624756CC42; Hm_lpvt_9707bc8d5f6bba210e7218b8496f076a=1653899232; SERVERID=bacac21aafa9027952fdc46518c0c74f|1653899455|1653897786',
            'Accept': 'application/json, text/javascript, */*; q=0.01',
            'Referer': 'https://www.pearvideo.com/'+li,
        }
        print(li_list_Name2[N])

        detail_url = 'https://www.pearvideo.com/videoStatus.jsp?contId=' + str(re.findall("video_(.*)",li)[0])
        page_text_2 = requests.get(detail_url, headers=headers)
        soup = BeautifulSoup(page_text_2.content, 'html.parser')  # 解析html
        SP_URL = re.findall('"srcUrl":"(.*?)"',str(soup))[0].replace('165'+re.findall('/165(.*?)-',re.findall('"srcUrl":"(.*?)"',str(soup))[0])[0], "cont-"+str(re.findall("video_(.*)",li)[0]))
        print(SP_URL)
        fff = requests.get(SP_URL)
        f = open(li_list_Name2[N]+'.mp4', 'wb')
        f.write(fff.content)
        f.close()
        N = N + 2